#####################################################
################ REPLICATION CODE FOR ###############
########### THE CONSEQUENCES OF COUNTING ############
########### World Politics January 2017 ############
########## EVAN LIEBERMAN AND PRERNA SINGH ##########
#####################################################


########## PRELIMINARIES ##########
## Set working directory to source file location
#setwd("xxx")

# Make sure packages are installed

library(foreign)
library(stargazer)
library(lmtest) 
library(rms)
library(ggplot2)
library(texreg)
library(car)
library(MASS)
library(dplyr)
library(xtable)

# Function to produce clustered standard errors

clse.f <- function(dat, fm, cluster){
  require(sandwich)
  require(lmtest)
  not <- attr(fm$model, "na.action")
  if( ! is.null(not)){
    cluster <- cluster[-not]
    dat <- dat[-not, ]
  }
  with(dat, {
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- fm$rank
    dfc <- (M / (M-1)) * ((N-1) / (N-K))
    uj <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum));
    vcovCL <- dfc * sandwich(fm, meat = crossprod(uj) / N)
    coeftest(fm, vcovCL)
  }
  )
}

# Use this version for ordered logit models

clse.f2 <- function(dat, fm, cluster){
  require(sandwich)
  require(lmtest)
  not <- attr(fm$model, "na.action")
  if( ! is.null(not)){
    cluster <- cluster[-not]
    dat <- dat[-not, ]
  }
  with(dat, {
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- dim(model.matrix(fm))[2]
    dfc <- (M / (M-1)) * ((N-1) / (N-K))
    uj <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum));
    vcovCL <- dfc * sandwich(fm, meat = crossprod(uj) / N)
    coeftest(fm, vcovCL)
  }
  )
}

########## LOADING THE DATA ##########

# Loading main dataset

census <- read.dta("censusdata.dta")
census$decade <- as.factor(census$decade)
census$region <- as.factor(census$region)

# Re-scaling Vanhanen's conflict indicators to between 0 and 1

census$vaninstconflict <- census$vaninstconf/100
census$vanviolconflict <- census$vanviolconf/100

# Loading dataset with just the codings of the census questionnaires
census.history <- read.csv("master_census.csv")
census.history$decade <- as.factor(census.history$decade)
census.history$region <- as.factor(census.history$region)

# Setting simulations for bootstrap

sims <- 1000

# Creating subsets of main census dataset for various analyses

census <- subset(census, year > 1945) # Restricting to post-1945 (outcomes only available for this period)
census.decade <- subset(census, decade == year) # Decades only
census.1995 <- subset(census, year == 1995) # 1995 only
censusexcept <- subset(census, ccodewb != "CHN" & ccodewb != "RUS") # Excluding China and Russia (because of number of groups)
censusexcept.decade <- subset(census, ccodewb != "CHN" & ccodewb != "RUS" & year == decade) # Excluding China and Russia, decades only

########## BEGIN FIGURES AND TABLES FOR MAIN ANALYSIS ##########

########## FIGURE 1: PATTERNS OF ETHNIC ENUMERATION 1800-2005 ##########

# Panel (a): Percent of Censuses Enumerating at Least One Ethnic Cleavage

par(mfrow = c(1, 1))
tiff("LiebermanFigure1a.tif", width = 8, height = 10, units = 'in', res = 300)
j <- table(census.history$decade, census.history$census_any_o)
k <- j[, 2] / (j[, 1] + j[, 2])
barplot(k, col = "gray", ylim = c(0, 1),
        main = "Percent of Censuses Enumerating at Least One Ethnic Cleavage\n")
dev.off()

# Panel (b): Number of Cleavages Enumerated, Share of All Censuses by Region

par(mfrow = c(1, 1))
tiff("LiebermanFigure1b.tif", width = 8, height = 10, units = 'in', res = 300)
l <- table(census.history$census_total_o, census.history$region)
m <- l
n <- l[1, ] + l[2, ] + l[3, ] + l[4, ] + l[5, ] + l[6, ]
for(i in 1:ncol(m)){
  m[,i] <- m[,i] / n[i]
}
regionlabel <- c("Adv.Ind.", "E.Europe", "Asia", "No.Afr/M.East", "Africa", "Lat. America")
colvector <- c("white", "gray90", "gray75", "gray60", "gray40", "black")
barplot(m,  
        main = "Number of Cleavages Enumerated, \nShare of All Censuses by Region",
        axes = TRUE, names.arg = regionlabel, xlim = c(0, 8), 
        col = colvector)
par(xpd = TRUE)
legend(7.4, 0.8, legend = rownames(m), fill = colvector, horiz = FALSE, cex = 0.9)
dev.off()

# Summary of enumeration for text

table(census.history$census_any_o)
table(census.history$census_any_o[census.history$decade == 1990])

########## TABLE 1: EFFECT OF ETHNIC ENUMERATION ON NUMBER OF POLITICALLY RELEVANT ETHNIC GROUPS, OLS ESTIMATES ##########

# Models

# Column 1

model.1.1 <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + decade + indeplag,
                data = censusexcept.decade)
model.1.1.c <- clse.f(censusexcept.decade, model.1.1, as.numeric(censusexcept.decade$ccodecow))

# Column 2

model.2.1 <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra 
                + anocl + regchg3 + polity2l + indeplag + decade, 
                data = censusexcept.decade)
model.2.1.c <- clse.f(censusexcept.decade, model.2.1, as.numeric(censusexcept.decade$ccodecow))

# Column 3

model.3.1 <- lm(groups ~ census_any_max10 + gdpcapl10 + numlang + lpopl + colbrit
                + colfra + anocl + regchg3 + polity2l + indeplag + decade,
                data = censusexcept.decade)
model.3.1.c <- clse.f(censusexcept.decade, model.3.1, as.numeric(censusexcept.decade$ccodecow))

# Column 4

model.4.1 <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra
                + anocl + regchg3 + polity2l + abs2 + sdemean2 + sdsuit2 + emean2 
                + avg2 + prec2 + temp2 + km22 + sea2 + mig2 + pop952 + lp2 + ag2
                + indeplag + decade, data = censusexcept.decade)
model.4.1.c <- clse.f(censusexcept.decade, model.4.1, as.numeric(censusexcept.decade$ccodecow))

# Column 5

model.5.1 <- lm(groups ~ census_any_max10 + gdpcapl10 + colbrit + colfra + al_ethnic
                + al_religion + al_language + lpopl + anocl + regchg3 + polity2l
                + indeplag + decade, data = censusexcept.decade)
model.5.1.c <- clse.f(censusexcept.decade, model.5.1, as.numeric(censusexcept.decade$ccodecow))

# Column 6

model.6.1 <- lm(groups ~ census_any_max10 + gdpcapl + lpopl + anocl + regchg3 + polity2l
                + factor(ccodecow) - 1, data = censusexcept)
se.6.1 <- sqrt(diag(vcov(model.6.1)))

# Table

covars <- c(
  'Ethnic enumeration (any) lagged',
  'Log GDP/cap lag10',
  'Number languages',
  'Log GDP/cap lag1',
  'Log population',
  'British col.',
  'French col.',
  'Ethnic Fractionalization (AL)',
  'Religious Fractionalization (AL)',
  'Linguistic Fractionalization (AL)',
  'Anocracy',
  'Instability',
  'Polity',
  'Absolute latitude',
  'Variation in elevation',
  'Variation in land quality',
  'Mean elevation',
  'Mean land quality',
  'Mean precipitation',
  'Mean temperature',
  'Log area',
  'Distance from Sea',
  'Migratory distance from E.Africa',
  'Log Population density in 1995',
  'Log Population density in 1500',
  'Timing transition to agriculture',
  'Years since independence'
) 
depvars <- c("Number politically relevant ethnic groups")
stargazer(model.1.1, model.2.1, model.3.1, model.4.1,model.5.1, model.6.1,
          style = "ajps", covariate.labels = covars,
          dep.var.labels = depvars,
          se = list(model.1.1.c[,2], model.2.1.c[,2], model.3.1.c[,2], model.4.1.c[,2],  model.5.1.c[,2], se.6.1),
          title = "Effect of Ethnic Enumeration on Number of Politically Relevant Ethnic Groups (EPR), OLS Estimates (1946-2005)",
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('decade', 'region','ccodecow'),
          model.numbers = FALSE,
          add.lines = list(c("Fixed effects?", "N", "N", "N", "N", "N", "Y"), c("Panel", "Decade", "Decade", "Decade", "Decade", "Decade", "Annual")),
          notes = ("Country clustered standard errors in parentheses.  Decade controls not shown.")
        )

########## FIGURE 2: ETHNIC ENUMERATION AS A PREDICTOR OF NUMBER OF POLITICALLY RELEVANT ETHNIC GROUPS (1946-2005) ##########

# Simulation loop begins here - define number of models, number of simulations (number sims set above)

models <- 5

# Create matrix to store output - a function of model #, sim #

outputepr <- matrix(NA, nrow = sims , ncol = models)
cluster <- names(table(censusexcept.decade$ccodecow))
cluster.len <- length(cluster)

# Begin simulation

for(i in 1:sims){
  # Sampling clusters from data set with replacement
  rows <- sample.int(cluster.len, replace = TRUE)
  cluster.temp <- table(cluster[rows])
  cd.temp <- NULL
  # Loop to generate data set from sampled clusters
  for(j in 1:max(cluster.temp)){
    cc <- censusexcept.decade[cluster %in% names(cluster.temp[cluster.temp %in% j]),]
    for(k in 1:j){
      cd.temp <- rbind(cd.temp, cc)
    }
  }
  # Run first model, then take difference in predicted values
  tempeg.1 <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + decade + indeplag,
                data = cd.temp)
  # Predicted values (with ethnic enumeration)
  t <- predict(tempeg.1, data.frame(census_any_max10 = 1, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                decade = "1970", indeplag = mean(cd.temp$indeplag, na.rm = T),
                lpopl = mean(cd.temp$lpopl, na.rm = T)), type = "response") 
  # Predicted values (without ethnic enumeration)
  c <- predict(tempeg.1, data.frame(census_any_max10  = 0, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                decade = "1970", indeplag = mean(cd.temp$indeplag, na.rm = T),
                lpopl = mean(cd.temp$lpopl, na.rm = T)), type = "response") 
  outputepr[i, 1] <- t - c
  # Repeat for second model
  tempeg.2 <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3
                + polity2l + indeplag + decade, data = cd.temp)
  t <- predict(tempeg.2, data.frame(census_any_max10 = 1, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp$anocl, na.rm = T),
                regchg3 = mean(cd.temp$regchg3, na.rm = T), polity2l = mean(cd.temp$polity2l, na.rm = T),
                indeplag = mean(cd.temp$indeplag, na.rm = T), decade = "1970"), type = "response")
  c <-  predict(tempeg.2, data.frame(census_any_max10  = 0, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                lpopl=mean(cd.temp$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp$anocl, na.rm = T),
                regchg3 = mean(cd.temp$regchg3, na.rm = T), polity2l = mean(cd.temp$polity2l, na.rm = T),
                indeplag = mean(cd.temp$indeplag, na.rm = T), decade = "1970"), type = "response")
  outputepr[i, 2] <- t - c
 # Repeat for third model 
  tempeg.3 <- lm(groups ~ census_any_max10 + gdpcapl10 + numlang + lpopl + colbrit + colfra + anocl + regchg3
                + polity2l + indeplag + decade, data = cd.temp)
  t <- predict(tempeg.3, data.frame(census_any_max10  = 1, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                numlang = mean(cd.temp$numlang, na.rm = T), lpopl = mean(cd.temp$lpopl, na.rm = T), colbrit = 1, colfra = 0,
                anocl = mean(cd.temp$anocl, na.rm = T), regchg3 = mean(cd.temp$regchg3, na.rm = T),
                polity2l = mean(cd.temp$polity2l, na.rm = T), indeplag = mean(cd.temp$indeplag, na.rm = T), decade = "1970"),
                type = "response")
  c <-  predict(tempeg.3, data.frame(census_any_max10  = 0, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                numlang = mean(cd.temp$numlang, na.rm = T), lpopl = mean(cd.temp$lpopl, na.rm = T), colbrit = 1, colfra = 0,
                anocl = mean(cd.temp$anocl, na.rm = T), regchg3 = mean(cd.temp$regchg3, na.rm = T),
                polity2l = mean(cd.temp$polity2l, na.rm = T), indeplag = mean(cd.temp$indeplag, na.rm = T), decade = "1970"),
                type = "response")
  outputepr[i, 3] <- t - c
  # Repeat for fourth model
  tempeg.4 <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l
                + indeplag + decade, data = cd.temp)
  t <- predict(tempeg.4, data.frame(census_any_max10  = 1, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp$anocl, na.rm = T),
                regchg3 = mean(cd.temp$regchg3, na.rm = T), polity2l = mean(cd.temp$polity2l, na.rm = T),
                abs2 = mean(cd.temp$abs2, na.rm = T), sdemean2 = mean(cd.temp$sdemean2, na.rm = T),
                sdsuit2 = mean(cd.temp$sdsuit2, na.rm = T), emean2 = mean(cd.temp$emean2, na.rm = T),
                avg2 = mean(cd.temp$avg2, na.rm = T), prec2 = mean(cd.temp$prec2, na.rm = T),
                temp2 = mean(cd.temp$temp2, na.rm = T), km22 = mean(cd.temp$km22, na.rm = T),
                sea2 = mean(cd.temp$sea2, na.rm = T), mig2 = mean(cd.temp$mig2, na.rm = T), pop952 = mean(cd.temp$pop952, na.rm = T),
                lp2 = mean(cd.temp$lp2, na.rm = T), ag2 = mean(cd.temp$ag2, na.rm = T), indeplag = mean(cd.temp$indeplag, na.rm = T),
                decade = "1970"), type = "response")
  c <- predict(tempeg.4, data.frame(census_any_max10  = 0, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp$anocl, na.rm = T),
                regchg3 = mean(cd.temp$regchg3, na.rm = T), polity2l = mean(cd.temp$polity2l, na.rm = T),
                abs2 = mean(cd.temp$abs2, na.rm = T), sdemean2 = mean(cd.temp$sdemean2, na.rm = T),
                sdsuit2 = mean(cd.temp$sdsuit2, na.rm = T), emean2 = mean(cd.temp$emean2, na.rm = T),
                avg2 = mean(cd.temp$avg2, na.rm = T), prec2 = mean(cd.temp$prec2, na.rm = T),
                temp2 = mean(cd.temp$temp2, na.rm = T), km22 = mean(cd.temp$km22, na.rm = T),
                sea2 = mean(cd.temp$sea2, na.rm = T), mig2 = mean(cd.temp$mig2, na.rm = T), pop952 = mean(cd.temp$pop952, na.rm = T),
                lp2 = mean(cd.temp$lp2, na.rm = T), ag2 = mean(cd.temp$ag2, na.rm = T), indeplag = mean(cd.temp$indeplag, na.rm = T),
                decade = "1970"), type = "response")
  outputepr[i, 4] <- t - c
  # Repeat for fifth model
  tempeg.5 <- lm(groups ~ census_any_max10 + gdpcapl10 + colbrit + colfra + al_ethnic + al_religion + al_language + lpopl + anocl
                + regchg3 + polity2l + indeplag + decade, data = cd.temp)
  t <- predict(tempeg.5, data.frame(census_any_max10  = 1, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T), lpopl = mean(cd.temp$lpopl, na.rm = T),
                colbrit = 1, colfra = 0, anocl = mean(cd.temp$anocl, na.rm = T), regchg3 = mean(cd.temp$regchg3, na.rm = T),
                polity2l = mean(cd.temp$polity2l, na.rm = T), al_ethnic = mean(cd.temp$al_ethnic, na.rm = T), al_religion = mean(cd.temp$al_religion, na.rm = T),
                al_language = mean(cd.temp$al_language, na.rm = T), indeplag = mean(cd.temp$indeplag, na.rm = T),
                decade = "1970"), type = "response")
  c <- predict(tempeg.5, data.frame(census_any_max10  = 0, gdpcapl10 = mean(cd.temp$gdpcapl10, na.rm = T), lpopl = mean(cd.temp$lpopl, na.rm = T),
                colbrit = 1, colfra = 0, anocl = mean(cd.temp$anocl, na.rm = T), regchg3 = mean(cd.temp$regchg3, na.rm = T),
                polity2l = mean(cd.temp$polity2l, na.rm = T), al_ethnic = mean(cd.temp$al_ethnic, na.rm = T), al_religion = mean(cd.temp$al_religion, na.rm = T),
                al_language = mean(cd.temp$al_language, na.rm = T), indeplag = mean(cd.temp$indeplag, na.rm = T),
                decade = "1970"), type = "response")
  outputepr[i, 5] <- t - c
  # Counter to show how many loops have completed
  print(i)
}

# Calculate point estimates and bootstrapped confidence intervals

cis.egip <- matrix(NA, nrow = models, ncol = 3) # number of models specified above
for(i in 1:models){
  cis.egip[i, 1] <- mean(outputepr[,i])
  cis.egip[i, 2] <- quantile(outputepr[, i], 0.025)
  cis.egip[i, 3] <- quantile(outputepr[, i], 0.975)
}

# Produce plot

tiff("LiebermanFigure2.tif", width = 7, height = 4, units = 'in', res = 300)
par(mfrow = c(1, 1))
plot(cis.egip[, 1], ylim = c((-0.2 *(0.1 + max(cis.egip[, 3])) ), (0.1 + max(cis.egip[, 3]))),     
     xlim = c(0.5, (models + 0.5)), xlab = "", ylab = "Effect Size", xaxt = "n", pch = 16,
     main ="Number Ethnic Groups (EPR)" )
for(i in 1:models){lines(c(i, i), c(cis.egip[i, 2], cis.egip[i,3]))}
axis(tick = F, 1, 1:models, 
     c("Base\nmodel", "Addl\ncontrols", "Control \nNum langs","Geographic\nControls", 
       "Diversity\nControls"))
lines(c(0.5, (models + 0.5)), c(0, 0), lty = 2)
dev.off()

########## TABLE 2: EFFECT OF ETHNIC ENUMERATION ON ETHNIC VIOLENCE, LOGIT ESTIMATES (1946-2005) ##########

# DV: Vanhanen ethnic conflict indicator (1990-1996)

# Column 1

model.1.2 <- glm(vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + indeplag + region, family = binomial(logit),
                data = census.1995)
model.1.2.c <- clse.f(census.1995, model.1.2, as.numeric(census.1995$ccodecow))

# Column 2

model.2.2 <- glm(vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3
                + polity2l + lrexclpop + region + indeplag, family = binomial (logit), data = census.1995)
model.2.2.c <- clse.f(census.1995, model.2.2, as.numeric(census.1995$ccodecow))

# DV: Minorities at Risk intergroup conflict indicator (1940-1999)

# Column 3

model.3.2 <- glm(marcomb ~ census_any_max10 + gdpcapl10 + lpopl + year + indeplag + region, family = binomial(logit),
                 data = census.decade)
model.3.2.c <- clse.f(census.decade, model.3.2, as.numeric(census.decade$ccodecow))

# Column 4

model.4.2 <- glm(marcomb ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3
                + polity2l + lrexclpop + region + indeplag + year, family = binomial (logit), data = census.decade)
model.4.2.c <- clse.f(census.decade, model.4.2, as.numeric(census.decade$ccodecow))

# DV: Onset of ethnic armed conflict

# Column 5

model.5.2 <- glm(newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + year + region + indeplag + nspline1 
                + nspline2 + nspline3, family = binomial(logit), data = census)
model.5.2.c <- clse.f(census, model.5.2, as.numeric(census$ccodecow))

# Column 6

model.6.2 <- glm(newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3
                + polity2l + lrexclpop + oilpcl + lmtnest + npeaceyears + ethwarhistb + region + indeplag
                + year + nspline1 + nspline2 + nspline3, family = binomial(logit), data = census)
model.6.2.c <- clse.f(census, model.6.2, as.numeric(census$ccodecow))

# Create table

covars <- c("Ethnic enumeration (lag1)", "Ethnic enumeration (max10)", "Log GDP/cap lag10", "Log pop lag1", "British col.", "French col.", "Anocracy", "Instability", "Polity", "Log Eth Excl Pop (EPR)", "Oil Prod per capita", "Mountainous", "Number peace years", "History ethnic war", "Year", "Years since indep.")
depvars <- c("Eth Viol.1990-6(Van)", "Eth Viol. (MAR)", "Eth Armed Conf (EPR)")

stargazer(model.1.2, model.2.2, model.3.2, model.4.2, model.5.2, model.6.2, 
          style = "ajps", covariate.labels = covars,
          dep.var.labels = depvars, se = list(model.1.2.c[, 2],model.2.2.c[, 2], model.3.2.c[, 2],
                    model.4.2.c[, 2], model.5.2.c[, 2], model.6.2.c[, 2]),
          title = "Effect of Ethnic Enumeration on Ethnic Violence, Logit Estimates (1946-2005)",
          omit.stat = c("f", "adj.rsq", "ser"), omit = c('decade', 'region', 'nspline1', 'nspline2', 'nspline3'),
          model.numbers = FALSE, label = "basemods2",
          notes = 'Region, decade controls, and cubic splines not shown. Standard errors in parentheses',
          add.lines =list(c("Panel","N","N","Decades","Decades", "Annual","Annual"),c("Clustered SEs","N","N","Y","Y","Y","Y"))
)

########## FIGURE 3: ETHNIC ENUMERATION AS A PREDICTOR OF ETHNIC CONFLICT AND ETHNIC VIOLENCE: BOOTSTRAPPED ESTIMATES ##########

# Begin figure

tiff("LiebermanFigure3.tif", width = 8, height = 5, units = 'in', res = 300)
par(mfrow = c(1, 3))

# DV: Vanhanen ethnic conflict indicator (1990-1996)

# Define number of models
models <- 2
# Create matrix to store output - a function of model #, sim # (defined above)
output.van <- matrix(NA, nrow=sims , ncol = models)
# Begin simulation
for(i in 1:sims){
  # Sample rows from data set with replacement
  rows <- sample(1:nrow(census.1995), nrow(census.1995), replace = T)
  # Create new dataset from sampled rows
  cd.temp.van <- census.1995[rows, ]
  # Run the first model
  temp.van.1 <- glm(vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + region + indeplag, family = binomial(logit),
                  data = cd.temp.van)
  # Generate predicted value with ethnic enumeration set to 1 and other variables set to their mean or mode
  t <- predict(temp.van.1, data.frame(census_any_lag1 = 1, gdpcapl10 = mean(cd.temp.van$gdpcapl10, na.rm = T),
                  region = "6", indeplag = mean(cd.temp.van$indeplag, na.rm = T), lpopl = mean(cd.temp.van$lpopl, na.rm = T)),
                  type = "response")
  # Repeat with ethnic enumeration set to 0
  c <- predict(temp.van.1, data.frame(census_any_lag1 = 0, gdpcapl10 = mean(cd.temp.van$gdpcapl10, na.rm = T),
                  region = "6", indeplag = mean(cd.temp.van$indeplag, na.rm = T), lpopl = mean(cd.temp.van$lpopl, na.rm = T)),
                  type = "response")
  # Calculate difference in predicted values and store
  output.van[i, 1] <-  t - c
  # Repeat for the second model
  temp.van.2 <- glm(vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l
                  + lrexclpop + region + indeplag, family = binomial(logit), data = cd.temp.van)
  t <- predict(temp.van.2, data.frame(census_any_lag1 = 1, gdpcapl10 = mean(cd.temp.van$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.van$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp.van$anocl, na.rm = T),
                  regchg3 = mean(cd.temp.van$regchg3, na.rm = T), polity2l = mean(cd.temp.van$polity2l, na.rm = T),
                  lrexclpop = mean(cd.temp.van$lrexclpop, na.rm = T), region = "6", indeplag = mean(cd.temp.van$indeplag, na.rm = T)),
                  type = "response")  
  c <- predict(temp.van.2, data.frame(census_any_lag1 = 0, gdpcapl10 = mean(cd.temp.van$gdpcapl10, na.rm = T), lpopl = mean(cd.temp.van$lpopl, na.rm = T),
                  colbrit = 1, colfra = 0, anocl = mean(cd.temp.van$anocl, na.rm = T), regchg3 = mean(cd.temp.van$regchg3, na.rm = T),
                  polity2l = mean(cd.temp.van$polity2l, na.rm = T), lrexclpop = mean(cd.temp.van$lrexclpop, na.rm = T), region = "6",
                  indeplag = mean(cd.temp.van$indeplag, na.rm = T)), type = "response")
  output.van[i, 2] <-  t - c
  # Counter to show how many loops have completed
  print(i)
}
# Calculate point estimates and bootstrapped confidence intervals for the QOI
cis.van <- matrix(NA, nrow = models, ncol = 3)
for(i in 1:models){
  cis.van[i, 1] <- mean(output.van[, i])
  cis.van[i, 2] <- quantile(output.van[, i], 0.025)
  cis.van[i, 3] <- quantile(output.van[, i], 0.975)
}
# Plot
plot(cis.van[, 1], ylim = c((-0.2 * (0.1 + max(cis.van[, 3]))), (0.1 + max(cis.van[, 3]))),     
     xlim = c(0.5, 2.5), xlab = "", ylab = "Change in Predicted Probabilities", xaxt = "n",
     pch = 16, main = "Ethnic Violence (Vanhanen)" )
for(i in 1:models){
  lines(c(i, i), c(cis.van[i, 2], cis.van[i, 3]))
}
axis(tick = FALSE, 1, 1:models, c("Base\nmodel", "Maximal\ncontrols"))
lines(c(0.5, 2.5), c(0, 0), lty = 2)

# DV: Minorities at Risk intergroup conflict indicator (1940-1999)

# Define number of models
models <- 2
# Create matrix to store output - a function of model #, sim #
output.mar <- matrix(NA, nrow = sims , ncol = models)
cluster <- names(table(census.decade$ccodecow))
cluster.len <- length(cluster)
for(i in 1:sims){
  # Sample clusters with replacement from the dataset
  rows <- sample.int(cluster.len, replace = T)
  cluster.temp <- table(cluster[rows])
  cd.temp.mar <- NULL
  # Loop to generate data set from sampled clusters
  for(j in 1:max(cluster.temp)){
    cc <- census[cluster %in% names(cluster.temp[cluster.temp %in% j]), ]
    for(k in 1:j){
      cd.temp.mar <- rbind(cd.temp.mar, cc)
    }
  }
  # Run the first model
  temp.mar.1 <- glm(marcomb ~ census_any_max10 + gdpcapl10 + lpopl + year + region + indeplag,
                  family = binomial(logit), data = cd.temp.mar)
  # Generate predicted value with ethnic enumeration set to 1 and other variables set to their mean or mode
  t <- predict(temp.mar.1, data.frame(census_any_max10 = 1, gdpcapl10 = mean(cd.temp.mar$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.mar$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.mar$indeplag, na.rm = T),
                  year = mean(cd.temp.mar$year, na.rm = T)), type = "response")
  # Repeat with ethnic enumeration set to zero
  c <- predict(temp.mar.1, data.frame(census_any_max10 = 0, gdpcapl10 = mean(cd.temp.mar$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.mar$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.mar$indeplag, na.rm = T),
                  year = mean(cd.temp.mar$year, na.rm = T)), type = "response")
  # Calculate difference in predicted values and store
  output.mar[i, 1] <- t - c
  # Repeat for second model
  temp.mar.2 <- glm(marcomb ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l + lrexclpop
                  + region + indeplag + year, family = binomial(logit), data = cd.temp.mar)
  t <- predict(temp.mar.2, data.frame(census_any_max10 = 1, gdpcapl10 = mean(cd.temp.mar$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.mar$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp.mar$anocl, na.rm = T),
                  regchg3 = mean(cd.temp.mar$regchg3, na.rm = T), polity2l = mean(cd.temp.mar$polity2l, na.rm = T), lrexclpop = mean(cd.temp.mar$lrexclpop, na.rm = T),
                  region = "6", indeplag = mean(cd.temp.mar$indeplag, na.rm = T), year = mean(cd.temp.mar$year, na.rm = T)), type = "response")  
  c <- predict(temp.mar.2, data.frame(census_any_max10 = 0, gdpcapl10 = mean(cd.temp.mar$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.mar$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp.mar$anocl, na.rm = T),
                  regchg3 = mean(cd.temp.mar$regchg3, na.rm = T), polity2l = mean(cd.temp.mar$polity2l, na.rm = T), lrexclpop = mean(cd.temp.mar$lrexclpop, na.rm = T),
                  region = "6", indeplag = mean(cd.temp.mar$indeplag, na.rm = T), year = mean(cd.temp.mar$year, na.rm = T)), type = "response")  
  output.mar[i, 2] <- t - c
  print(i)
}
# Calculate point estimates and  bootstrapped confidence intervals for the QOI
cis.mar <- matrix(NA, nrow = models, ncol = 3)
for(i in 1:models){
  cis.mar[i, 1] <- mean(output.mar[, i])
  cis.mar[i, 2] <- quantile(output.mar[, i], 0.025)
  cis.mar[i, 3] <- quantile(output.mar[, i], 0.975)
}
# Plot
plot(cis.mar[, 1], ylim = c((-0.2 * (0.1 + max(cis.mar[, 3]))), (0.1 + max(cis.mar[, 3]))),     
     xlim = c(0.5, 2.5), xlab = "", ylab = "", xaxt = "n", pch = 16, main ="Ethnic Violence (MAR)" )
for(i in 1:models){
  lines(c(i, i), c(cis.mar[i, 2], cis.mar[i, 3]))
}
axis(tick = FALSE, 1, 1:models, c("Base\nmodel", "Maximal\ncontrols"))
lines(c(0.5, 2.5), c(0, 0), lty = 2)

# DV: Onset of ethnic armed conflict

# Define number of models
models <- 2
# Create matrix to store output - a function of model #, sim # (defined above)
output.eac <- matrix(NA, nrow = sims , ncol = models)
cluster <- names(table(census$ccodecow))
cluster.len <- length(cluster)
for(i in 1:sims){
  # Sample clusters from the dataset with replacement
  rows <- sample.int(cluster.len, replace = T)
  cluster.temp <- table(cluster[rows])
  cd.temp.eac <- NULL
  # Generate dataset from sampled clusters
  for(j in 1:max(cluster.temp)){
    cc <- census[cluster %in% names(cluster.temp[cluster.temp %in% j]), ]
    for(k in 1:j){
      cd.temp.eac <- rbind(cd.temp.eac, cc)
    }
  }
  # Run first model
  temp.eac.1 <- glm(newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + year + region + indeplag,
                  family = binomial(logit), data = cd.temp.eac)
  # Generate predicted value with ethnic enumeration set to 1 and other variables set to their mean or mode
  t <- predict(temp.eac.1, data.frame(census_any_lag1 = 1, gdpcapl10 = mean(cd.temp.eac$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.eac$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.eac$indeplag, na.rm = T),
                  year = mean(cd.temp.eac$year, na.rm = T)), type = "response") 
  # Repeat with ethnic enumeration set to zero
  c <- predict(temp.eac.1, data.frame(census_any_lag1 = 0, gdpcapl10 = mean(cd.temp.eac$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.eac$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.eac$indeplag, na.rm = T),
                  year = mean(cd.temp.eac$year, na.rm = T)), type = "response") 
  # Calculate difference in predicted values and store
  output.eac[i, 1] <- t - c
  # Repeat for second model
  temp.eac.2 <- glm(newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l
                  + lrexclpop + oilpcl + lmtnest + npeaceyears + ethwarhistb + region + indeplag + year,
                  family = binomial(logit), data = cd.temp.eac)
  t <- predict(temp.eac.2, data.frame(census_any_lag1 = 1, gdpcapl10 = mean(cd.temp.eac$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.eac$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp.eac$anocl, na.rm = T),
                  regchg3 = mean(cd.temp.eac$regchg3, na.rm = T), polity2l = mean(cd.temp.eac$polity2l, na.rm = T),
                  lrexclpop = mean(cd.temp.eac$lrexclpop, na.rm = T), oilpcl = mean(cd.temp.eac$oilpcl, na.rm = T),
                  lmtnest = mean(cd.temp.eac$lmtnest, na.rm = T), npeaceyears = mean(cd.temp.eac$npeaceyears, na.rm = T),
                  ethwarhistb = mean(cd.temp.eac$ethwarhistb, na.rm = T), region = "6", indeplag = mean(cd.temp.eac$indeplag, na.rm = T),
                  year = mean(cd.temp.eac$year, na.rm = T)), type = "response") 
  c <- predict(temp.eac.2, data.frame(census_any_lag1 = 0, gdpcapl10 = mean(cd.temp.eac$gdpcapl10, na.rm = T),
                  lpopl = mean(cd.temp.eac$lpopl, na.rm = T), colbrit = 1, colfra = 0, anocl = mean(cd.temp.eac$anocl, na.rm = T),
                  regchg3 = mean(cd.temp.eac$regchg3, na.rm = T), polity2l = mean(cd.temp.eac$polity2l, na.rm = T),
                  lrexclpop = mean(cd.temp.eac$lrexclpop, na.rm = T), oilpcl = mean(cd.temp.eac$oilpcl, na.rm = T),
                  lmtnest = mean(cd.temp.eac$lmtnest, na.rm = T), npeaceyears = mean(cd.temp.eac$npeaceyears, na.rm = T),
                  ethwarhistb = mean(cd.temp.eac$ethwarhistb, na.rm = T), region = "6", indeplag = mean(cd.temp.eac$indeplag, na.rm = T),
                  year = mean(cd.temp.eac$year, na.rm = T)), type = "response") 
  output.eac[i, 2] <- t - c
  print(i)
}
# Calculate point estimates and  bootstrapped confidence intervals for the QOI
cis.eac <- matrix(NA, nrow = models, ncol = 3)
for(i in 1:models){
  cis.eac[i, 1] <- mean(output.eac[, i])
  cis.eac[i, 2] <- quantile(output.eac[, i], 0.025)
  cis.eac[i, 3] <- quantile(output.eac[, i], 0.975)
}
# Plot
plot(cis.eac[, 1], ylim = c((-0.2 * (0.002 + max(cis.eac[,3]))), (0.002 + max(cis.eac[,3]))),          
     xlim = c(0.5, 2.5), xlab = "", ylab = "", xaxt = "n", pch = 16,
     main ="Ethnic Armed Conflict (EPR)")
for(i in 1:models){
  lines(c(i, i), c(cis.eac[i, 2], cis.eac[i,3]))
}
axis(tick = F, 1, 1 : models, c("Base\nmodel", "Maximal\ncontrols"))
lines(c(0.5, 2.5), c(0, 0), lty = 2)
# End figure
dev.off()

########## FIGURE 4: NUMBER OF ETHNIC CLEAVAGES ENUMERATED AS A PREDICTOR OF ARMED CONFLICT ONSET (1946-2005): BOOTSTRAPPED ESTIMATES ##########

# Begin figure
tiff("LiebermanFigure4.tif", width = 8, height = 10, units = 'in', res = 300)
par(mfrow = c(3, 2))
# Define number of models
models <- 5
# Create matrix to store output - a function of model #, sim # (defined above)
output.4 <- matrix(NA, nrow = sims , ncol = models)
cluster <- names(table(census$ccodecow))
cluster.len <- length(cluster)
# Begin simulation
for(i in 1:sims){
  # Sample clusters from the data set with replacement
  rows <- sample.int(cluster.len, replace = T)
  cluster.temp <- table(cluster[rows])
  cd.temp.4 <- NULL
  # Generate data set from sampled clusters
  for(j in 1:max(cluster.temp)){
    cc <- census[cluster %in% names(cluster.temp[cluster.temp %in% j]), ]
    for(k in 1:j){
      cd.temp.4 <- rbind(cd.temp.4, cc)
    }
  }
  # Run the model (DV is onset of new ethnic armed conflict)
  temp.4 <- glm(newethonset ~ census_total_lag1 + gdpcapl10 + lpopl + year + region + indeplag,
                family = binomial(logit), data = cd.temp.4)
  # Generate predicted values for number of enumerated groups set to 5 and all other variables set to mean or mode
  t5 <- predict(temp.4, data.frame(census_total_lag1 = 5, gdpcapl10 = mean(cd.temp.4$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp.4$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.4$indeplag, na.rm = T),
                year = mean(cd.temp.4$year, na.rm = T)), type = "response") 
  # Generate predicted values for number of enumerated groups set to 4 and all other variables set to mean or mode
  t4 <- predict(temp.4, data.frame(census_total_lag1 = 4, gdpcapl10 = mean(cd.temp.4$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp.4$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.4$indeplag, na.rm = T),
                year = mean(cd.temp.4$year, na.rm = T)), type = "response") 
  # Generate predicted values for number of enumerated groups set to 3 and all other variables set to mean or mode
  t3 <- predict(temp.4, data.frame(census_total_lag1 = 3, gdpcapl10 = mean(cd.temp.4$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp.4$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.4$indeplag, na.rm = T),
                year = mean(cd.temp.4$year, na.rm = T)), type = "response") 
  # Generate predicted values for number of enumerated groups set to 2 and all other variables set to mean or mode
  t2 <- predict(temp.4, data.frame(census_total_lag1 = 2, gdpcapl10 = mean(cd.temp.4$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp.4$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.4$indeplag, na.rm = T),
                year = mean(cd.temp.4$year, na.rm = T)), type = "response") 
  # Generate predicted values for number of enumerated groups set to 1 and all other variables set to mean or mode
  t1 <- predict(temp.4, data.frame(census_total_lag1 = 1, gdpcapl10 = mean(cd.temp.4$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp.4$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.4$indeplag, na.rm = T),
                year = mean(cd.temp.4$year, na.rm = T)), type = "response") 
  # Generate predicted values for number of enumerated groups set to 0 and all other variables set to mean or mode
  c <- predict(temp.4, data.frame(census_total_lag1 = 0, gdpcapl10 = mean(cd.temp.4$gdpcapl10, na.rm = T),
                lpopl = mean(cd.temp.4$lpopl, na.rm = T), region = "6", indeplag = mean(cd.temp.4$indeplag, na.rm = T),
                year = mean(cd.temp.4$year, na.rm = T)), type = "response") 
  # Calculating difference in predicted values between each number of groups and zero
  output.4[i, 1] <- t1 - c
  output.4[i, 2] <- t2 - c
  output.4[i, 3] <- t3 - c
  output.4[i, 4] <- t4 - c
  output.4[i, 5] <- t5 - c
  # Counter to show how many loops have completed
  print(i)
}
# Calculate point estimates and bootstrapped confidence intervals for the QOI
cis.4 <- matrix(NA, nrow = models, ncol = 3)
for(i in 1:models){
  cis.4[i, 1] <- mean(output.4[, i])
  cis.4[i, 2] <- quantile(output.4[, i], 0.025)
  cis.4[i, 3] <- quantile(output.4[, i], 0.975)
}
# Plot
par(mfrow = c(1, 1))
plot(cis.4[, 1], ylim = c(0, 0.08), ylab = "Change in Predicted Probability", xlab = "Number of ethnic cleavages enumerated",
     xaxt = "n", pch = 16, main = "Likelihood of Ethnic Armed Conflict (EPR)")
for(i in 1:models){
  lines(c(i, i), c(cis.4[i, 2], cis.4[i,3]))
}
axis(tick = F, 1, 1:models, c("1", "2", "3", "4", "5"))
abline(h = 0,lty = 2)
# End figure
dev.off()

########## TABLE 3: EFFECT OF DIFFERENT FORMS OF IDENTITY ENUMERATION ON ETHNIC VIOLENCE, LOGIT ESTIMATES (1946-2005) ##########

# Enumeration variables and associated controls
ivarnames <- Cs(census_total_lag10 + al_ethnic + al_religion + numlang, 
                census_race_lag10 + al_ethnic, 
                census_lang_lag10 + numlang, 
                census_relig_lag10 + al_religion, 
                census_caste_lag10 + al_ethnic , 
                census_indig_lag10 + al_ethnic, 
                census_tribethoth_lag10 + al_ethnic)
# Controls for all models
controls <- "+ gdpcapl10 + lpopl + region + indeplag + year"
# Set up list to store models
modelfits.reg <- vector(length(ivarnames), mode = "list")
modelfits.reg.c <- vector(length(ivarnames), mode = "list")
census.reg <- census.decade
# Loop through enumeration variables - regress ethic violence (MAR) on each + controls
for(j in ivarnames){
  modelformula.reg <- paste("marcomb ~ ", j, controls)
  modelfits.reg[[j]] <- glm(as.formula(modelformula.reg), family = binomial(logit), data = census.reg)
  modelfits.reg.c[[j]] <- clse.f(census.reg, modelfits.reg[[j]], as.numeric(census.reg$ccodecow))
}
# Create table
covarsc <- c(
  'Ethnic enumeration (total) lag10',
  'Ethnic enumeration (race) lag10',
  'Ethnic enumeration (language) lag10',
  'Ethnic enumeration (religion) lag10',
  'Ethnic enumeration (caste) lag10',
  'Ethnic enumeration (indigenous) lag10',
  'Ethnic enumeration (tribe/ethnicity) lag10',
  'Ethnic diversity (AL)',
  'Number of languages',
  'Religious diversity (AL)'
) 
stargazer(modelfits.reg[[ivarnames[1]]], modelfits.reg[[ivarnames[2]]], modelfits.reg[[ivarnames[3]]], modelfits.reg[[ivarnames[4]]], 
          modelfits.reg[[ivarnames[5]]], modelfits.reg[[ivarnames[6]]],modelfits.reg[[ivarnames[7]]],
          style = "ajps", covariate.labels = covarsc,
          title = paste("Effect of different forms of identity enumeration on Ethnic Violence (MAR), Logit Estimates (1946-2005)") ,
          omit.stat = c("f", "adj.rsq", "ser"),
          keep = Cs(census_total_lag10,census_race_lag10, census_lang_lag10, census_relig_lag10,
                   census_caste_lag10,census_indig_lag10, census_tribethoth_lag10, 
                   al_ethnic,  numlang, al_religion),
          order = Cs(census_total_lag10,census_race_lag10, census_lang_lag10, census_relig_lag10,
                    census_caste_lag10,census_indig_lag10, census_tribethoth_lag10, 
                    al_ethnic, numlang, al_religion),
          model.numbers = FALSE,
          dep.var.labels = "Ethnic Violence (MAR)",
          label = "enumcatsmar",
          notes = 'Country-clustered standard errors in parentheses. Additional controls not shown'
  )

########## TABLE 4: EFFECT OF DIFFERENT FORMS OF IDENTITY ENUMERATION ON ETHNIC CIVIL WAR, LOGIT ESTIMATES (1946-2005) ##########

# Enumeration variables and associated controls
ivarnames <- Cs(census_total_lag1 + al_ethnic + al_religion + numlang, 
                census_race_lag1 + al_ethnic, 
                census_lang_lag1 + numlang, 
                census_relig_lag1 + al_religion, 
                census_caste_lag1 + al_ethnic , 
                census_indig_lag1 + al_ethnic, 
                census_tribethoth_lag1 + al_ethnic)
# Controls for all models
controls <- "+ gdpcapl10 + lpopl + region + indeplag + year"
# Set up list to store models
modelfits.reg <- vector(length(ivarnames), mode = "list")
modelfits.reg.c <- vector(length(ivarnames), mode = "list")
census.reg <- census
# Loop through enumeration variables - regress ethnic civil war (EPR) on each + controls
for(j in ivarnames) {
  modelformula.reg <- paste("newethonset ~ ", j, controls)
  modelfits.reg[[j]] <- glm(as.formula(modelformula.reg), family = binomial(logit), data = census.reg)
  modelfits.reg.c[[j]] <- clse.f(census.reg, modelfits.reg[[j]], as.numeric(census.reg$ccodecow))
}
# Create table
covarsc <- c(
  'Ethnic enumeration (total) lag1',
  'Ethnic enumeration (race) lag1',
  'Ethnic enumeration (language) lag1',
  'Ethnic enumeration (religion) lag1',
  'Ethnic enumeration (caste) lag1',
  'Ethnic enumeration (indigenous) lag1',
  'Ethnic enumeration (tribe/ethnicity) lag1',
  'Ethnic diversity (Alesina)',
  'Number of languages',
  'Religious diversity (Alesina)'
) 
stargazer(modelfits.reg[[ivarnames[1]]], modelfits.reg[[ivarnames[2]]], modelfits.reg[[ivarnames[3]]], modelfits.reg[[ivarnames[4]]], 
          modelfits.reg[[ivarnames[5]]], modelfits.reg[[ivarnames[6]]],modelfits.reg[[ivarnames[7]]],
          style = "ajps", covariate.labels = covarsc,
          dep.var.labels = "Ethnic Civil War (EPR)",
          title = paste("Effect of different forms of identity enumeration on Ethnic Civil War (EPR), Logit Estimates (1946-2005)") ,
          omit.stat = c("f", "adj.rsq", "ser"),
          keep = Cs(census_total_lag1,census_race_lag1, census_lang_lag1, census_relig_lag1,
                    census_caste_lag1,census_indig_lag1, census_tribethoth_lag1, 
                    al_ethnic,  numlang, al_religion),
          order = Cs(census_total_lag1,census_race_lag1, census_lang_lag1, census_relig_lag1,
                    census_caste_lag1,census_indig_lag1, census_tribethoth_lag1, 
                    al_ethnic, numlang, al_religion),
          model.numbers = FALSE,
          label="enumcatsciv",
          notes = 'Country-clustered standard errors in parentheses. Additional controls not shown'
)

########## TABLE 5: ETHNIC ENUMERATION AND ETHNIC CIVIL WAR, VARYING LAGS AND PLACEBO TESTS ##########

## Table 5 was created in Stata -- please see that file

########## TABLE 6: LOGIT ESTIMATES OF ETHNIC ENUMERATION AS A PREDICTOR OF ETHNONATIONALIST WARS ##########

## Table 6 was created in Stata -- please see that file

########## TABLE 7: ESTIMATES OF THE EFFECT OF ETHNONATIONALIST WARS ON LIKELIHOOD OF ETHNIC ENUMERATION (1946-2005) ##########

## Table 7 was created in Stata -- please see that file


########## TABLE 8: EFFECT OF ETHNIC ENUMERATION ON PEACE, NEGATIVE BINOMIAL ESTIMATES (1946-2005) ##########

# Column 1

model.1.8 <- glm.nb(npeaceyears ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3
                  + polity2l + lrexclpop + oilpcl + lmtnest + region + indeplag, data = census)
model.1.8.c <- clse.f(census, model.1.8, as.numeric(census$ccodecow))

# Column 2

model.2.8 <- glm.nb(npeaceyears ~ census_total_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3
                   + polity2l + lrexclpop + oilpcl + lmtnest + region + indeplag, data = census)
model.2.8.c <- clse.f(census, model.2.8, as.numeric(census$ccodecow))

# Creating table

covarsc <- c(
  'Ethnic enumeration (any) lag1',
  'Ethnic enumeration (total) lag1',
  'Log GDP/cap lag10',
  'Log population',
  'British col.',
  'French col.',
  'Anocracy',
  'Instability',
  'Polity',
  'Log Eth Excl Pop',
  'Oil per capita lag1',
  'Mountainous',
  'Years since independence'
) 

stargazer(model.1.8, model.2.8,
          style = "ajps",
          covariate.labels = covarsc,
          dep.var.labels = "Number Peace Years",
          title = paste("Effect of Ethnic Enumeration on Peace, Negative Binomial Estimates (1946-2005)") ,
          se = list(model.1.8.c[, 2], model.2.8.c[, 2]),
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('region'),
          model.numbers = FALSE,
          label ="peace",
          notes = 'Country-clustered standard errors in parentheses. '
)

########## BEGIN FIGURES AND TABLES FOR APPENDIX ##########

########## TABLE A1: SUMMARY STATISTICS ##########

## ## Table A1 was created in Stata -- please see that file

########## TABLE 2: ORDERED LOGIT ESTIMATES OF THE EFFECT OF ETHNIC ENUMERATION ON ETHNIC VIOLENCE ##########

## ## Table A2 was created in Stata -- please see that file



########## TABLE 3: EFFECT OF ETHNIC ENUMERATION ON EXISTENCE OF POLITICALLY RELEVANT ETHNIC GROUPS (EPR), LOGIT ESTIMATES (1946-2005) ##########

# Column 1

model.1.3a <- glm(ethrelevant ~ census_any_max10 + gdpcapl10 + lpopl + decade 
                 + indeplag, family = binomial(logit), data = census.decade)
model.1.3a.c <- clse.f(census.decade, model.1.3a, as.numeric(census.decade$ccodecow))

# Column 2

model.2.3a <- glm(ethrelevant ~ census_any_max10  + gdpcapl10 + lpopl + colbrit
                + colfra + anocl + regchg3 + polity2l + indeplag + decade,
                family = binomial(logit), data = census.decade)
model.2.3a.c <- clse.f(census.decade, model.2.3a, as.numeric(census.decade$ccodecow))

# Column 3

model.3.3a <- glm(ethrelevant ~ census_any_max10 + gdpcapl10 + numlang + lpopl
                + colbrit + colfra + anocl + regchg3 + polity2l + indeplag
                + decade, family = binomial(logit), data = census.decade)
model.3.3a.c <- clse.f(census.decade, model.3.3a, as.numeric(census.decade$ccodecow))

# Column 4

model.4.3a <- glm(ethrelevant ~ census_any_max10 + gdpcapl10 + lpopl + colbrit
                + colfra + anocl + regchg3 + polity2l + abs2 + sdemean2 + sdsuit2 
                + emean2 + avg2 + prec2 + temp2 + km22 + sea2 + mig2 + pop952 + lp2
                + ag2 + indeplag + decade, family = binomial(logit), data = census.decade)
model.4.3a.c <- clse.f(census.decade, model.4.3a, as.numeric(census.decade$ccodecow))

# Column 5

model.5.3a <- glm(ethrelevant ~ census_any_max10 + gdpcapl10 + colbrit + colfra
                + al_ethnic + al_religion + al_language + lpopl + anocl + regchg3
                + polity2l + indeplag + decade, family = binomial(logit),
                data = census.decade)
model.5.3a.c <- clse.f(census.decade, model.5.3a, as.numeric(census.decade$ccodecow))

# Create table

covars <- c(
  'Ethnic enumeration (any) max10',
  'Log GDP/cap lag10',
  'Number languages',
  'Log pop lag1',
  'British col.',
  'French col.',
  'Ethnic Fractionalization (AL)',
  'Religious Fractionalization (AL)',
  'Linguistic Fractionalization (AL)',
  'Anocracy',
  'Instability',
  'Polity',
  'Absolute latitude',
  'Variation in elevation',
  'Variation in land quality',
  'Mean elevation',
  'Mean land quality',
  'Mean precipitation',
  'Mean temperature',
  'Log area',
  'Distance from Sea',
  'Migratory distance from E.Africa',
  'Log Population density in 1995',
  'Log Population density in 1500',
  'Timing transition to agriculture',
  'Years since independence'
) 
depvars <- c("Any politically relevant ethnic groups (EPR)?")
stargazer(model.1.3a, model.2.3a, model.3.3a, model.4.3a, model.5.3a,
          style = "ajps",
          covariate.labels = covars,
          dep.var.labels = depvars,
          se = list(model.1.3a.c[, 2], model.2.3a.c[, 2], model.3.3a.c[, 2], model.4.3a.c[, 2],  model.5.3a.c[, 2]),
          title = "Effect of Ethnic Enumeration on Existence of Politically Relevant Ethnic Groups (EPR), Logit Estimates (1946-2005)",
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('decade', 'region'),
          model.numbers = FALSE,
          notes = ("Country clustered standard errors in parentheses.  Decade controls not shown.")
)

########## TABLE 4: EFFECT OF ETHNIC ENUMERATION ON POLITICAL RELEVANCE USING POSNER DATA: LOGIT ESTIMATES ##########

# Column 1

model.1.4a <- glm(pregb ~ census_any_max10 + gdpcapl10 + lpopl + indeplag,
                 family = binomial(logit), data = census.1995)

# Column 2

model.2.4a <- glm(pregb ~ census_any_max10 + gdpcapl10 + ethfrac + indeplag
                 + lpopl, family = binomial (logit), data = census.1995)

# Column 3

model.3.4a <- glm(pregb ~ census_total_max10 + gdpcapl10 + lpopl + indeplag,
                 family = binomial(logit), data = census.1995)

# Column 4

model.4.4a <- glm(pregb ~ census_total_max10 + gdpcapl10 + ethfrac + indeplag
                 + lpopl, family = binomial (logit), data = census.1995)

# Create table

covarsb <- c(
  'Ethnic enumeration (any)',
  'Ethnic enumeration (total)',
  'Log GDP/cap (lag10)',
  'Ethnic Fractionalization',
  'Log pop',
  'Time since independence'
) 
depvars <- c("Any politically relevant ethnic groups (Posner)?")
stargazer(model.1.4a, model.2.4a, model.3.4a, model.4.4a,
          style = "ajps",
          covariate.labels = covarsb,
          dep.var.labels = depvars,
          title = "Effect of Ethnic Enumeration on Political Relevance Using Posner Data: Logit Estimates",
          omit.stat = c("f", "adj.rsq", "ser"),
          model.numbers = FALSE,
          notes = 'Standard errors in parentheses.'
)

########## TABLE 5: EFFECT OF ETHNIC ENUMERATION ON ETHNIC POLITICAL CONFLICT USING VANHANEN DATA: OLS ESTIMATES ##########

# Column 1

model.1.5a <- lm(vaninstconflict ~ census_any_lag1 + gdpcapl10 + lpopl + indeplag,
                data = census.1995)

# Column 2

model.2.5a <- lm(vaninstconflict ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit
                + colfra + anocl + regchg3 + polity2l + lrexclpop + indeplag,
                data = census.1995)

# Create table

covarsb <- c(
  'Ethnic enumeration, any (lag1)',
  'Log GDP/cap (lag10)',
  'Log population (lag1)',
  'British col.',
  'French col.',
  'Anocracy',
  'Instability',
  'Polity',
  'Ln ethnic excluded population share',
  'Years since independence'
) 
depvars <- c("Ethnic Political Conflict Index 1990-6 (Vanhanen)")
stargazer(model.1.5a, model.2.5a,
          style = "ajps",
         covariate.labels = covarsb,
          dep.var.labels = depvars,
          title = "Effect of Ethnic Enumeration on Ethnic Political Conflict Using Vanhanen Data: OLS Estimates",
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('region'),
          model.numbers = FALSE,
          notes = 'Standard errors in parentheses.'
)

########## TABLES 6-8: EFFECT OF ETHNIC ENUMERATION ON VIOLENT CONFLICT, SENSITIVITY TO DIFFERENT ETHNIC DIVERSITY CONTROLS ##########

# Ethnic diversity variables
ivarnames <- Cs(ethfrac, relfrac, al_ethnic, al_language, al_religion, lrexclpop, egipgrps)
# Creating lists to store models for each DV
modelfits.van <- vector(length(ivarnames), mode = "list") # Vanhanen
modelfits.mar <- vector(length(ivarnames), mode = "list") # MAR
modelfits.mar.c <- vector(length(ivarnames), mode = "list")
modelfits.civ <- vector(length(ivarnames), mode = "list") # EPR
modelfits.civ.c <- vector(length(ivarnames), mode = "list")

# Regressing each DV on all ethnic diversity variables

for(i in ivarnames) {
  # Vanhanen
  modelformula.van <- paste("vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l + region + indeplag +", i)
  modelfits.van[[i]] <- glm(as.formula(modelformula.van), family = binomial(logit), data = census.1995)
  # MAR
  modelformula.mar <- paste("marcomb ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l + region + indeplag + decade +", i)
  modelfits.mar[[i]] <- glm(as.formula(modelformula.mar), family = binomial(logit), data = census.decade)
  modelfits.mar.c[[i]] <- clse.f(census.decade, modelfits.mar[[i]], as.numeric(census.decade$ccodecow))
  # Armed ethnic conflict onset (EPR)
  modelformula.civ <- paste("newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + colbrit + colfra + anocl + regchg3 + polity2l + oilpcl + lmtnest + npeaceyears + ethwarhistb + region + indeplag + year + nspline1  + nspline2 + nspline3 +", i)
  modelfits.civ[[i]] <- glm(as.formula(modelformula.civ), family = binomial(logit), data = census)
  modelfits.civ.c[[i]] <- clse.f(census, modelfits.civ[[i]], as.numeric(census$ccodecow))
  
}

# Table 6 (Vanhanen Data)

covarsc <- c(
    'Ethnic enumeration (any) lag1',
    'Log GDP/cap lag10',
    'Log pop lag1',
    'British col.',
    'French col.',
    'Anocracy',
    'Instability',
    'Polity',
    'Years since independence',
    'Eth Frac.',
    'Rel Frac.',
    'Ethnic Fractionalization (AL)',
    'Linguistic Fractionalization (AL)',
    'Religious Fractionalization (AL)',
    'Log Percent Eth Excl Pop',
    'N Eth Grps in Power'
) 
stargazer(modelfits.van[[ivarnames[1]]], modelfits.van[[ivarnames[2]]], modelfits.van[[ivarnames[3]]], modelfits.van[[ivarnames[4]]], 
          modelfits.van[[ivarnames[5]]], modelfits.van[[ivarnames[6]]],modelfits.van[[ivarnames[7]]],
          style = "ajps",
          title = "Effect of ethnic enumeration on violent ethnic conflict (Vanhanen), Sensitivity to different ethnic diversity controls",
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('region'),
          model.numbers = FALSE,
          dep.var.labels = "Ethnic Conflict",
          covariate.labels = covarsc,
          label = "ethrobustvan2",
          notes = "Regional dummies not shown"
)

# Table 7 (MAR data)

covarsc <- c(
  'Ethnic enumeration (any) max10',
  'Log GDP/cap lag10',
  'Log pop lag1',
  'British col.',
  'French col.',
  'Anocracy',
  'Instability',
  'Polity',
 'Years since independence',
  'Eth Frac.',
  'Rel Frac.',
  'Ethnic Fractionalization (AL)',
  'Linguistic Fractionalization (AL)',
  'Religious Fractionalization (AL)',
  'Log Percent Eth Excl Pop',
  'N Eth Grps in Power'
) 
stargazer(modelfits.mar[[ivarnames[1]]], modelfits.mar[[ivarnames[2]]], modelfits.mar[[ivarnames[3]]], modelfits.mar[[ivarnames[4]]], 
          modelfits.mar[[ivarnames[5]]], modelfits.mar[[ivarnames[6]]], modelfits.mar[[ivarnames[7]]],
          style = "ajps",
          title = "Effect of ethnic enumeration on ethnic violence (MAR), Sensitivity to different ethnic diversity controls",
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('region', 'decade'),
          model.numbers = FALSE,
          dep.var.labels = "Ethnic Violence",
          covariate.labels = covarsc,
          object.names = FALSE,
          se = list(modelfits.mar.c[[ivarnames[1]]][,2], modelfits.mar.c[[ivarnames[2]]][,2], modelfits.mar.c[[ivarnames[3]]][,2], 
                    modelfits.mar.c[[ivarnames[4]]][,2], modelfits.mar.c[[ivarnames[5]]][,2], modelfits.mar.c[[ivarnames[6]]][,2], 
                    modelfits.mar.c[[ivarnames[7]]][,2]),
          label = "ethrobustmar",
          notes = "Regional dummies not shown"        
)

# Table 8 (EPR data)

covarsc <- c(
  'Ethnic enumeration (any) lag1',
  'Log GDP/cap lag10',
  'Log pop lag1',
  'British col.',
  'French col.',
  'Anocracy',
  'Instability',
  'Polity',
  'Oil per capita lag',
  'Log mountainous',
  'N Peace Years',
  'History ethnic war',
  'Years since independence',
  'Eth Frac.',
  'Rel Frac.',
  'Ethnic Fractionalization (AL)',
  'Linguistic Fractionalization (AL)',
  'Religious Fractionalization (AL)',
  'Log Percent Eth Excl Pop',
  'N Eth Grps in Power'
) 
stargazer(modelfits.civ[[ivarnames[1]]], modelfits.civ[[ivarnames[2]]], modelfits.civ[[ivarnames[3]]], modelfits.civ[[ivarnames[4]]], 
          modelfits.civ[[ivarnames[5]]], modelfits.civ[[ivarnames[6]]], modelfits.civ[[ivarnames[7]]],
          style = "ajps",
          se = list(modelfits.civ.c[[ivarnames[1]]][,2], modelfits.civ.c[[ivarnames[2]]][,2], 
                    modelfits.civ.c[[ivarnames[3]]][,2], modelfits.civ.c[[ivarnames[4]]][,2], modelfits.civ.c[[ivarnames[5]]][,2],
                    modelfits.civ.c[[ivarnames[6]]][,2], modelfits.civ.c[[ivarnames[7]]][,2]),
          title = "Effect of ethnic enumeration on ethnic armed conflict onset (epr), Sensitivity to different ethnic diversity controls",
          omit.stat = c("f", "adj.rsq", "ser"),
          model.numbers = FALSE,
          dep.var.labels = "Ethnic Armed Conflict Onset",
          covariate.labels = covarsc,
          object.names = FALSE,
          omit = c('region', 'nspline1', 'nspline2', 'nspline3'),
          label = "ethrobustciv",
          notes = "Regional dummies and splines not shown"
)

########## TABLE 9: EFFECT OF ETHNIC ENUMERATION ON NUMBER OF POLITICALLY RELEVANT ETHNIC GROUPS (EPR), OLD ESTIMATES (1946-2005), EXCLUDING SEVERE OUTLIER CASES ##########

# Column 1

model.1.9a <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + decade + indeplag,
                data = census.decade)
model.1.9a.c <- clse.f(census.decade, model.1.9a, as.numeric(census.decade$ccodecow))

# Column 2

model.2.9a <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra
                + anocl + regchg3 + polity2l + indeplag + decade, data = census.decade)
model.2.9a.c <- clse.f(census.decade, model.2.9a, as.numeric(census.decade$ccodecow))

# Column 3

model.3.9a <- lm(groups ~ census_any_max10 + gdpcapl10 + numlang + lpopl + colbrit
                + colfra + anocl + regchg3 + polity2l + indeplag + decade,
                data = census.decade)
model.3.9a.c <- clse.f(census.decade, model.3.9a, as.numeric(census.decade$ccodecow))

# Column 4

model.4.9a <- lm(groups ~ census_any_max10 + gdpcapl10 + lpopl + colbrit + colfra
                + anocl + regchg3 + polity2l + abs2 + sdemean2 + sdsuit2 + emean2 
                + avg2 + prec2 + temp2 + km22 + sea2 + mig2 + pop952 + lp2 + ag2
                + indeplag + decade, data = census.decade)
model.4.9a.c <- clse.f(census.decade, model.4.9a, as.numeric(census.decade$ccodecow))

# Column 5

model.5.9a <- lm(groups ~ census_any_max10 + gdpcapl10 + colbrit + colfra + al_ethnic
                + al_religion + al_language + lpopl + anocl + regchg3 + polity2l
                + indeplag + decade, data = census.decade)
model.5.9a.c <- clse.f(census.decade, model.5.9a, as.numeric(census.decade$ccodecow))

# Column 6

model.6.9a <- lm(groups ~ census_any_max10 + gdpcapl + lpopl + anocl + regchg3 + polity2l
                + factor(ccodecow) - 1, data = census)
se.6.9a <- sqrt(diag(vcov(model.6.9a)))

# Create table

covars <- c(
  'Ethnic enumeration (any) lagged',
  'Log GDP/cap lag10',
  'Number languages',
  'Log GDP/cap lag1',
  'Log population',
  'British col.',
  'French col.',
  'Ethnic Fractionalization (AL)',
  'Religious Fractionalization (AL)',
  'Linguistic Fractionalization (AL)',
  'Anocracy',
  'Instability',
  'Polity',
  'Absolute latitude',
  'Variation in elevation',
  'Variation in land quality',
  'Mean elevation',
  'Mean land quality',
  'Mean precipitation',
  'Mean temperature',
  'Log area',
  'Distance from Sea',
  'Migratory distance from E.Africa',
  'Log Population density in 1995',
  'Log Population density in 1500',
  'Timing transition to agriculture',
  'Years since independence'
) 
depvars <- c("Number politically relevant ethnic groups")
stargazer(model.1.9a, model.2.9a, model.3.9a, model.4.9a, model.5.9a, model.6.9a,
          style = "ajps",
          covariate.labels = covars,
          dep.var.labels = depvars,
          se = list(model.1.9a.c[, 2], model.2.9a.c[, 2], model.3.9a.c[, 2], model.4.9a.c[, 2],  model.5.9a.c[, 2], se.6.9a),
          title = "Effect of Ethnic Enumeration on Number of Politically Relevant Ethnic Groups (EPR), OLS Estimates (1946-2005)",
          omit.stat = c("f", "adj.rsq", "ser"),
          omit = c('decade', 'region','ccodecow'),
          model.numbers = FALSE,
          add.lines =list(c("Fixed effects?","N","N","N","N","N","Y"), c("Panel","Decade","Decade","Decade","Decade","Decade","Annual")),
          notes = ("Country clustered standard errors in parentheses.  Decade controls not shown.")
)

########## FIGURE 2: NUMBER, POLITICALLY RELEVANT ETHNIC GROUPS (EPR) ##########

par(mfrow = c(1,1))
tiff(file = "LiebermanFigure2_appendix.tif")
m <- ggplot(census, aes(x = groups))
m + geom_histogram(aes(y =..density..), colour = "lightgoldenrod2", fill = "lightgoldenrod3", 
                   binwidth = 1.95) + labs(x = "Number of ethnopolitically relevant groups")
dev.off()

########## FIGURE 3: SENSITIVITY ANALYSES ##########

# Robustness to country exclusions

# List of countries
countries <- unique(census.decade$ccodecow)
# Create vectors to store p-values for each model
output.c.van <- c()
output.c.mar <- c()
output.c.epr <- c()
# Loop through list of countries, run three models with each excluded
for(i in 1:length(countries)){
  # Create three subsets with country excluded (decade-only, all, 1995 only)
  census.decade.c <- subset(census.decade, ccodecow != countries[i])
  census.c <- subset(census, ccodecow != countries[i])
  census.1995.c <- subset(census.1995, ccodecow != countries[i])
  # Vanhanen
  model.van <- glm(vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + indeplag
                   + region, family = binomial(logit), data = census.1995.c)
  model.van.c <- clse.f(census.1995.c, model.van, as.numeric(census.1995.c$ccodecow))
  output.c.van[i] <- model.van.c[2, 4]
  # MAR
  model.mar <- glm(marcomb ~ census_any_max10 + gdpcapl10 + lpopl + decade + indeplag
                   + region, family = binomial(logit), data = census.decade.c)
  model.mar.c <- clse.f(census.decade.c, model.mar, as.numeric(census.decade.c$ccodecow))
  output.c.mar[i] <- model.mar.c[2, 4]
  # Ethnic war onset (EPR)
  model.epr <- glm(newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + year + region
                   + indeplag + nspline1 + nspline2 + nspline3, family = binomial(logit),
                   data = census.c)
  model.epr.c <- clse.f(census.c, model.epr, as.numeric(census.c$ccodecow))
  output.c.epr[i] <- model.epr.c[2, 4]
  # Counter
  print(i)
}
# Plot
tiff("LiebermanFigure3a_appendix.tif", width = 8, height = 10, units = 'in', res = 300)
plot(output.c.van, type = "p", ylim = c(0, 0.15),  pch = 2, xlab = "Excluded country (in order of COW code)",
     ylab = "p-value of estimated census enumeration coefficient", 
     main = "Robustness of Base Models\n to Exclusion of Individual Country Cases")
par(new = T)
plot(output.c.mar, type = "p", ylim = c(0, 0.15), pch = 3, xlab= "", ylab = "", 
     main = "")
par(new = T)
plot(output.c.epr, type= "p", ylim = c(0, 0.15), pch = 4, xlab= "", ylab = "", 
     main = "")
par(new = F)
pointypes <- c(2, 3, 4)
# Legend
models <- c("Van eth viol", "MAR eth viol", "Eth Arm Conf")
legend(110, 0.13, models, pch = pointypes)
lines(c(0,length(countries)), c(0.05, 0.05), lty = 2)
lines(c(0,length(countries)), c(0.10, 0.10), lty = 3)
dev.off()

# Robustness to region exclusions

regions <- c(0, 2, 3, 5, 6, 7)
# Create vectors to store p-values for each model
output.r.van <- c()
output.r.mar <- c()
output.r.epr <- c()
# Loop through list of regions, run three models with each excluded
for(i in 1:length(regions)){
  census.decade.r <- subset(census.decade, region != regions[i])
  census.r <- subset(census, region != regions[i])
  census.1995.r <- subset(census.1995, region != regions[i])
  # Vanhanen
  model.van <- glm(vanethb ~ census_any_lag1 + gdpcapl10 + lpopl + indeplag
                   + region, family = binomial(logit), data = census.1995.r)
  model.van.c <- clse.f(census.1995.r, model.van, as.numeric(census.1995.r$ccodecow))
  output.r.van[i] <- model.van.c[2, 4]
  # MAR
  model.mar <- glm(marcomb ~ census_any_max10 + gdpcapl10 + lpopl + decade
                   + indeplag + region, family = binomial(logit),
                   data = census.decade.r)
  model.mar.c <- clse.f(census.decade.r, model.mar, as.numeric(census.decade.r$ccodecow))
  output.r.mar[i] <- model.mar.c[2, 4]
  # EPR
  model.epr <- glm(newethonset ~ census_any_lag1 + gdpcapl10 + lpopl + year
                   + region + indeplag + nspline1 + nspline2 + nspline3,
                   family = binomial(logit), data = census.r)
  model.epr.c <- clse.f(census.r, model.epr, as.numeric(census.r$ccodecow))
  output.r.epr[i] <- model.epr.c[2, 4]
  # Counter
  print(i)
}
# Plot
tiff("LiebermanFigure3b_appendix.tif", width = 8, height = 10, units = 'in', res = 300)
regionlabel <- c("W.Eur/Adv.Ind.", "E.Europe", "Asia", "No.Afr/M.East", "Africa", "Lat. America")
plot(output.r.van, type = "p", ylim = c(0, 0.15), pch = 2, xlab = "Excluded Region ", ylab = "p-value of estimated census enumeration coefficient", 
     main = "Robustness of Base Models\n to Exclusion of Individual Regions", xaxt = "n")
axis(1, at = c(1:6), labels = regionlabel)
par(new = T)
plot(output.r.mar, type = "p", ylim = c(0, 0.15), pch = 3, xlab = "", ylab = "", 
     main = "", xaxt = "n")
par(new = T)
plot(output.r.epr, type =  "p", ylim = c(0 , 0.15), pch = 4, xlab = "", ylab = "", 
     main = "", xaxt = "n")
par(new = F)
pointypes <- c(2,3,4)
# Legend
legend(5, 0.13, models, pch = pointypes )
lines(c(1,length(regions)), c(0.05, 0.05), lty = 2)
lines(c(1,length(regions)), c(0.10, 0.10), lty = 3)
dev.off()

########## TABLE 10: RARE EVENTS LOGIT ESTIMATES OF ETHNIC ENUMERATION AS PREDICTOR OF ETHNIC ARMED CONFLICT (1946-2005) ##########

## ## Table A10 was created in Stata -- please see that file



########## TABLE 11: LOGIT ESTIMATES OF THE INTERACTION OF ETHNIC ENUMERATION AND CROSS CUTTING ETHNIC CLEAVAGES ON OUTBREAK OF ETHNIC CIVIL WAR (1946-2005) ##########

## ## Table A11 was created in Stata -- please see that file

\

########## FIGURE 4: ENUMERATION OF RELIGION AND LANGUAGE CATEGORIES AS PREDICTORS OF LIKELIHOOD OF NEW ETHNIC ARMED CONFLICT ONSET, CONDITIONAL ON DEGREE OF CROSS-CUTTINGNESS ##########

## ## Figure 4 was created in Stata -- please see that file

